# -------------------------------------------------------------------
# Purpose: Creates Table 9
# Author:  Max Posch, 25/07/2025
# Usage:   Source this script to generate the table.
# -------------------------------------------------------------------
# Check that required paths exist
stopifnot(dir.exists(pdataanalysis))
stopifnot(dir.exists(poutput))


# Load data
load(file.path(pdataanalysis, "countyLevel19001940.RData"))


# Regressions
o <- list()
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + entropy_namelast_mp_adjp_100_200m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_100_200m_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))
o <- append(o, list(feols(sum_patents_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + entropy_namelast_mp_adjp_100_200m_ws + entropy_namelast_mp_adjp_200_300m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_100_200m_ws + sum_n_namelast_mp_adjp_200_300m_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + entropy_namelast_mp_adjp_100_200m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_100_200m_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))
o <- append(o, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + entropy_namelast_mp_adjp_100_200m_ws + entropy_namelast_mp_adjp_200_300m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_100_200m_ws + sum_n_namelast_mp_adjp_200_300m_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))

r <- list()
r <- append(r, list(feols(sum_patents_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))
r <- append(r, list(feols(sum_patents_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))
r <- append(r, list(feols(sum_patents_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_200_300m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_200_300m_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))
r <- append(r, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_200_300m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_200_300m_ws | gisjoin_1900 + statefip^year + gisjoin_1900[year], countyLevel19001940)))

i <- list()
i <- append(i, list(feols(sum_patents_pc_1900_f_w ~ 1 | gisjoin_1900 + statefip^year | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + gisjoin_1900[year], countyLevel19001940)))
i <- append(i, list(feols(sum_patents_pc_1900_f_w ~ 1 | gisjoin_1900 + statefip^year | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + entropy_namelast_mp_adjp_100_200m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_100_200m_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws + gisjoin_1900[year], countyLevel19001940)))
i <- append(i, list(feols(sum_patents_pc_1900_f_w ~ 1 | gisjoin_1900 + statefip^year | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + entropy_namelast_mp_adjp_100_200m_ws + entropy_namelast_mp_adjp_200_300m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_100_200m_ws + sum_n_namelast_mp_adjp_200_300m_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_200_300m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_200_300m_ws + gisjoin_1900[year], countyLevel19001940)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ 1 | gisjoin_1900 + statefip^year | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + gisjoin_1900[year], countyLevel19001940)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ 1 | gisjoin_1900 + statefip^year | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + entropy_namelast_mp_adjp_100_200m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_100_200m_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws + gisjoin_1900[year], countyLevel19001940)))
i <- append(i, list(feols(sum_break_p80_rrfsim05_pc_1900_f_w ~ 1 | gisjoin_1900 + statefip^year | entropy_namelast_mp_adjp_ws + entropy_namelast_mp_adjp_100m_ws + entropy_namelast_mp_adjp_100_200m_ws + entropy_namelast_mp_adjp_200_300m_ws + sum_n_namelast_mp_adjp_ws + sum_n_namelast_mp_adjp_100m_ws + sum_n_namelast_mp_adjp_100_200m_ws + sum_n_namelast_mp_adjp_200_300m_ws ~ iv_lo_entropy_namelast_mp_adjp_fe_immig_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws + iv_lo_entropy_namelast_mp_adjp_fe_immig_200_300m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws + iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_200_300m_ws + gisjoin_1900[year], countyLevel19001940)))


# F-statistics
dstata <- countyLevel19001940[, .(
  x1 = entropy_namelast_mp_adjp_ws, z1 = iv_lo_entropy_namelast_mp_adjp_fe_immig_ws,
  x2 = entropy_namelast_mp_adjp_100m_ws, z2 = iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws,
  x3 = entropy_namelast_mp_adjp_100_200m_ws, z3 = iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws,
  x4 = entropy_namelast_mp_adjp_200_300m_ws, z4 = iv_lo_entropy_namelast_mp_adjp_fe_immig_200_300m_ws,
  x5 = sum_n_namelast_mp_adjp_ws, z5 = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws,
  x6 = sum_n_namelast_mp_adjp_100m_ws, z6 = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws,
  x7 = sum_n_namelast_mp_adjp_100_200m_ws, z7 = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws,
  x8 = sum_n_namelast_mp_adjp_200_300m_ws, z8 = iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_200_300m_ws,
  y1 = sum_patents_pc_1900_f_w, y2 = sum_break_p80_rrfsim05_pc_1900_f_w,
  gisjoin_1900_f, statefip_f, year_f, year_num
)]
commands <- list(
  'ivreghdfe y1 (x1 x2 x5 x6= z1 z2 z5 z6), absorb(gisjoin_1900_f year_f#statefip_f c.year_num##gisjoin_1900_f) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   gen swf3 = .
   gen swf4 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   replace swf3 = round(e(first)["SWF",3])
   replace swf4 = round(e(first)["SWF",4])
   keep swf*
   keep if _n == 1',
  'ivreghdfe y1 (x1 x2 x3 x5 x6 x7= z1 z2 z3 z5 z6 z7), absorb(gisjoin_1900_f year_f#statefip_f c.year_num##gisjoin_1900_f) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   gen swf3 = .
   gen swf4 = .
   gen swf5 = .
   gen swf6 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   replace swf3 = round(e(first)["SWF",3])
   replace swf4 = round(e(first)["SWF",4])
   replace swf5 = round(e(first)["SWF",5])
   replace swf6 = round(e(first)["SWF",6])
   keep swf*
   keep if _n == 1',
  'ivreghdfe y1 (x1 x2 x3 x4 x5 x6 x7 x8 = z1 z2 z3 z4 z5 z6 z7 z8), absorb(gisjoin_1900_f year_f#statefip_f c.year_num##gisjoin_1900_f) cluster(statefip_f) first ffirst savefirst
   gen swf1 = .
   gen swf2 = .
   gen swf3 = .
   gen swf4 = .
   gen swf5 = .
   gen swf6 = .
   gen swf7 = .
   gen swf8 = .
   replace swf1 = round(e(first)["SWF",1])
   replace swf2 = round(e(first)["SWF",2])
   replace swf3 = round(e(first)["SWF",3])
   replace swf4 = round(e(first)["SWF",4])
   replace swf5 = round(e(first)["SWF",5])
   replace swf6 = round(e(first)["SWF",6])
   replace swf7 = round(e(first)["SWF",7])
   replace swf8 = round(e(first)["SWF",8])
   keep swf*
   keep if _n == 1'
)
swf_results <- as.character(unlist(lapply(commands, get_fstat_from_stata, data.in = dstata)))
swfstat <- swf_results %>% str_split("; ", simplify = TRUE)
swfstat_diversity <- c(paste(swfstat[1, 1:2], collapse = "; "), paste(swfstat[2, 1:3], collapse = "; "), paste(swfstat[3, 1:4], collapse = "; "))
swfstat_population <- c(paste(swfstat[1, 3:4], collapse = "; "), paste(swfstat[2, 4:6], collapse = "; "), paste(swfstat[3, 5:8], collapse = "; "))
swfstat_diversity <- rep(swfstat_diversity, times = 2)
swfstat_population <- rep(swfstat_population, times = 2)


# Create table
y1_mean <- round(mean(countyLevel19001940$sum_patents_pc_1900_f_w), 2)
y1_sd <- round(sd(countyLevel19001940$sum_patents_pc_1900_f_w), 2)
y2_mean <- round(mean(countyLevel19001940$sum_break_p80_rrfsim05_pc_1900_f_w), 2)
y2_sd <- round(sd(countyLevel19001940$sum_break_p80_rrfsim05_pc_1900_f_w), 2)
y1 <- paste0("\\makecell{Patents \\\\ per 1,000 people \\\\ (mean = ", y1_mean, ", sd = ", y1_sd, ")}")
y2 <- paste0("\\makecell{Breakthrough patents \\\\ per 1,000 people \\\\ (mean = ", y2_mean, ", sd = ", y2_sd, ")}")

setFixest_dict(
  c(
    entropy_namelast_mp_adjp_ws = "Surname diversity",
    entropy_namelast_mp_adjp_100m_ws = "Surname diversity ($<$ 100 miles)",
    entropy_namelast_mp_adjp_100_200m_ws = "Surname diversity (100 $<$ 200 miles)",
    entropy_namelast_mp_adjp_200_300m_ws = "Surname diversity (200 $<$ 300 miles)",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_ws = "Predicted surname diversity",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_100m_ws = "Predicted surname diversity ($<$ 100 miles)",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_100_200m_ws = "Predicted surname diversity (100 $<$ 200 miles)",
    iv_lo_entropy_namelast_mp_adjp_fe_immig_200_300m_ws = "Predicted surname diversity (200 $<$ 300 miles)",
    sum_n_namelast_mp_adjp_ws = "Population",
    sum_n_namelast_mp_adjp_100m_ws = "Population ($<$ 100 miles)",
    sum_n_namelast_mp_adjp_100_200m_ws = "Population (100 $<$ 200 miles)",
    sum_n_namelast_mp_adjp_200_300m_ws = "Population (200 $<$ 300 miles)",
    iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_ws = "Predicted population",
    iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100m_ws = "Predicted population ($<$ 100 miles)",
    iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_100_200m_ws = "Predicted population (100 $<$ 200 miles)",
    iv_lo_sum_n_namelast_mp_adjp_fe_immig_tr_200_300m_ws = "Predicted population (200 $<$ 300 miles)",
    sum_patents_pc_1900_f_w = y1, sum_break_p80_rrfsim05_pc_1900_f_w = y2,
    year = "Period", statefip = "State", namelast_mp = "Surname", gisjoin_1900 = "County"
  )
)

tablename <- file.path(poutput, "table09.tex")
etable(o,
  cluster = ~statefip,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  order = c("diversity"),
  file = tablename, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
edit_table_content_fixed(tablename, "Period $\\times $ County", "County-specific linear trends")
add_table_row(tablename, "\\midrule", "\\multicolumn{2}{l}{\\textit{Panel A: Least-squares estimates}} &  \\multicolumn{5}{c}{}\\\\ \\cmidrule(lr){1-7}")
add_table_row(tablename, "mean =", "\\cmidrule(lr){2-4}  \\cmidrule(lr){5-7}")
move_table_row(tablename, "Observations", "bottomrule")
add_table_row(tablename, "    \\\\", c("\\multicolumn{2}{l}{\\textit{Panel B: Reduced-form estimates}} &  \\multicolumn{5}{c}{}\\\\", "\\\\", "\\multicolumn{2}{l}{\\textit{Panel C: Instrumental-variable estimates}} &  \\multicolumn{5}{c}{}\\\\"))

temptable <- file.path(poutput, "temp.tex")
etable(r,
  cluster = ~statefip,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  order = c("diversity"),
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
add_table_row(tablename, "Panel B", c("\\cmidrule(lr){1-7}", estimates_rows))

temptable <- file.path(poutput, "temp.tex")
etable(i,
  cluster = ~statefip,
  fitstat = ~n,
  digits = "r3", digits.stats = "r3",
  order = c("diversity"),
  extralines = list(
    "Sanderson-Windmeijer \\textit{F}-stat: Surname diversity" = swfstat_diversity, 
    "Sanderson-Windmeijer \\textit{F}-stat: Population" = swfstat_population),
  file = temptable, replace = TRUE,
  style.tex = style.tex("aer"), tex = TRUE
)
estimates_rows <- get_estimates_rows(temptable)
add_table_row(tablename, "Panel C", c("\\cmidrule(lr){1-7}", estimates_rows))
add_table_row(tablename, "stat: Surname diversity", "\\\\", "before")
file.remove(temptable)

cat("Table 9 saved to:", tablename, "\n")